# -*- coding: utf-8 -*-
import scrapy
from spCrawl.items import DongguanItem
from scrapy.log import logger

class SunwzSpider(scrapy.Spider):
    name = 'sunwz'
    allowed_domains = ['wz.sun0769.com']
    # 初始爬取链接
    url = "http://wz.sun0769.com/index.php/question/questionType?type=4&page="
    offset = 0
    start_urls = [url + str(offset)]

    def parse(self, response):

        links = response.xpath("//div[@class='greyframe']/table//td/a[@class='news14']/@href").extract()
        logger.info(links)
        for link in links:
            yield scrapy.Request(link, callback=self.parse_item)

        # 最大95610
        if self.offset < 300:
            self.offset += 30
        yield scrapy.Request(url=self.url + str(self.offset), callback=self.parse)


    def parse_item(self, response):

        item = DongguanItem()
        item["title"] = response.xpath("//div[contains(@class, 'pagecenter p3')]//strong/text()").extract()[0]
        item["number"] = item["title"]
        item["content"] = response.xpath("//div[@class='contentext']/text()").extract()
        item["url"] = response.url
        yield item

